R packages.
library(fda)
library(ggplot2)
pm10 <- read.csv("MPM1017.csv")[, -1]
no2 <- read.csv("MNO217.csv")[, -1]
# no2 <- no2[, -c(1, 3)]
plot1 <-
reshape2::melt(pm10) %>%
dplyr::select(Station = variable,
PM10 = value) %>%
mutate(X = rep(seq(nrow(pm10)),
ncol(pm10))) %>%
ggplot(aes_string(x = "X",
y = "PM10",
color = "Station")) +
geom_line() +
theme_light() +
scale_color_viridis_d()
## No id variables; using all as measure variables
plot2 <-
reshape2::melt(no2) %>%
dplyr::select(Station = variable,
NO2 = value) %>%
mutate(X = rep(seq(nrow(no2)),
ncol(no2))) %>%
ggplot(aes_string(x = "X",
y = "NO2",
color = "Station")) +
geom_line() +
theme_light() +
scale_color_viridis_d()
## No id variables; using all as measure variables
plotly::ggplotly(plot1)
plotly::ggplotly(plot2)
Create basis
npm10 <- nrow(pm10)
nno2 <- nrow(no2)
base_bspline <- create.bspline.basis(c(1, npm10), 15)
base_fu <- create.fourier.basis(c(1, nno2), 15)
class(base_bspline)
## [1] "basisfd"
class(base_fu)
## [1] "basisfd"
methods(class = "basisfd")
## [1] * == norder plot predict summary
## see '?methods' for accessing help and source code
methods(class = "basisfd")
## [1] * == norder plot predict summary
## see '?methods' for accessing help and source code
Regression Spline
Define a Functional Parameter Object
param_lambda <- fdPar(base_bspline, lambda = 0)
fd_smooth <- smooth.basis(argvals = seq(1, npm1o),
y = as.matrix(pm10),
fdParobj = param_lambda)
methods(class = "fdPar")
## [1] boxplot coef coefficients plot predict
## [6] summary
## see '?methods' for accessing help and source code
Smooth basis
fd_smooth <- smooth.basis(argvals = seq(1, npm1o),
y = as.matrix(pm10),
fdParobj = param_lambda)
methods(class = "fdPar")
## [1] boxplot coef coefficients plot predict
## [6] summary
## see '?methods' for accessing help and source code
Spline smoothing
Define a Functional Parameter Object
base_bspline <- create.bspline.basis(c(1, npm10), 15)
param_lambda <- fdPar(base_bspline,
2,
lambda = 0.1)
methods(class = "fdPar")
## [1] boxplot coef coefficients plot predict
## [6] summary
## see '?methods' for accessing help and source code
Smooth basis
fd_smooth <- smooth.basis(argvals = seq(1, npm1o),
y = as.matrix(pm10),
fdParobj = param_lambda)
methods(class = "fdSmooth")
## [1] as.fd boxplot coef coefficients fitted
## [6] lines plot predict residuals
## see '?methods' for accessing help and source code
Lambda Selection.
gcv <- NULL
lambda_list <- seq(-2, 4, length.out = 30)
matrix_data <- as.matrix(pm10)
for (lambda in lambda_list) {
lambda <- 10**lambda
param_lambda <- fdPar(base_bspline,
2,
lambda)
fd_smooth <- smooth.basis(argvals = seq(1,
nrow(matrix_data)),
y = matrix_data,
fdParobj = param_lambda)
gcv <- c(gcv, sum(fd_smooth$gcv))
}
plotgcv <-
data.frame(log_lambda = lambda_list,
gcv = gcv) %>%
ggplot(aes(x = log_lambda, y = gcv)) +
geom_line(color = "purple", linetype = "dashed") +
theme_light() +
scale_x_continuous(limits = c(0, 4))
plotly::ggplotly(plotgcv)
selected_lamdba <- 10**lambda_list[which.min(gcv)]
print(selected_lamdba)
## [1] 573.6153
print(log(selected_lamdba, 10))
## [1] 2.758621
param_lambda <- fdPar(base_bspline,
2,
lambda = selected_lamdba)
fd_smooth <- smooth.basis(argvals = seq(1,
nrow(matrix_data)),
y = matrix_data,
fdParobj = param_lambda)
Mean and covariance
summary(fd_smooth)
## Length Class Mode
## fd 3 fd list
## df 1 -none- numeric
## gcv 15 -none- numeric
## beta 0 -none- NULL
## SSE 1 -none- numeric
## penmat 225 -none- numeric
## y2cMap 4515 -none- numeric
## argvals 301 -none- numeric
## y 4515 -none- numeric
summary(fd_smooth$fd)
## Functional data object:
##
## Dimensions of the data:
## time
## reps
## values
##
## Basis object:
##
## Type: bspline
##
## Range: 1 to 301
##
## Number of basis functions: 15
##
## Order of spline: 4
## [1] " Interior knots"
## [1] 26 51 76 101 126 151 176 201 226 251 276
##
## Coefficient matrix:
## AJM ATI CAM CHO HGM IZT MER
## bspl4.1 34.110501 31.83064 50.20089 70.37051 39.08153 46.98081 48.34281
## bspl4.2 32.477066 50.98302 73.67716 54.98442 54.73972 34.26232 68.63172
## bspl4.3 49.143241 61.41435 71.62944 93.33580 66.47449 46.84670 83.83257
## bspl4.4 48.240887 54.91478 66.26488 49.29242 39.09159 41.95314 60.22371
## bspl4.5 36.784271 52.46350 51.01799 54.29874 50.12077 33.54044 44.38670
## bspl4.6 59.387184 60.82071 92.89793 74.44615 61.28273 53.34583 87.00475
## bspl4.7 27.305529 25.19857 41.28708 51.23261 31.57539 30.85612 50.72208
## bspl4.8 31.299420 41.68267 49.76532 66.30478 36.75911 30.50820 50.16534
## bspl4.9 32.796806 35.38682 33.98313 41.03040 28.41548 27.18365 38.96741
## bspl4.10 32.218024 32.11503 51.00969 29.57862 40.85753 31.32665 53.96163
## bspl4.11 34.189246 35.81296 35.78263 67.72373 26.42651 29.74819 37.76812
## bspl4.12 4.960169 19.64149 25.80655 14.28972 23.97862 25.31323 31.93691
## bspl4.13 55.665066 53.06209 62.38848 49.59352 40.80872 34.99670 59.29485
## bspl4.14 21.586488 24.83213 38.32911 67.65709 26.12782 29.63947 33.50390
## bspl4.15 11.311397 18.72002 48.88571 31.16112 38.53841 28.15478 53.17658
## MPA SAG SFE TAH TLA TLI VIF
## bspl4.1 51.17289 72.45661 39.09742 66.06110 46.68763 44.31135 62.97789
## bspl4.2 41.84901 48.31982 31.46336 20.95821 65.66030 78.37847 91.99489
## bspl4.3 58.39260 129.71638 51.41534 75.70928 68.30748 47.21604 54.91774
## bspl4.4 54.61263 67.70804 44.39371 62.12911 57.93407 77.52482 84.87685
## bspl4.5 44.67746 52.30892 41.32273 36.16466 61.72442 52.55434 64.63194
## bspl4.6 52.43237 88.65200 60.51195 70.21679 80.12129 70.34230 71.85168
## bspl4.7 30.66939 44.00495 25.87816 38.15850 31.67644 37.27066 38.17742
## bspl4.8 43.40051 50.28220 31.91501 47.66555 50.61078 41.35442 48.41801
## bspl4.9 31.35076 40.09806 30.48199 30.87800 34.96751 43.94444 42.59748
## bspl4.10 31.98917 40.07873 34.94306 37.01859 48.11265 23.84916 34.69820
## bspl4.11 48.72591 39.56231 30.06635 34.98404 38.78770 58.67257 59.12485
## bspl4.12 10.93356 40.09187 11.55308 42.44577 25.28165 23.66874 34.93287
## bspl4.13 50.63609 39.75644 61.16406 33.40438 68.87154 43.97740 31.80344
## bspl4.14 26.86598 49.54702 27.39930 47.14853 51.07971 37.14566 33.66057
## bspl4.15 10.16847 52.15154 22.33607 22.39165 45.08720 34.48901 49.86364
## XAL
## bspl4.1 57.58899
## bspl4.2 104.95805
## bspl4.3 86.77519
## bspl4.4 85.78762
## bspl4.5 60.26297
## bspl4.6 121.60980
## bspl4.7 48.09534
## bspl4.8 50.26583
## bspl4.9 54.19862
## bspl4.10 40.21468
## bspl4.11 51.42980
## bspl4.12 45.11898
## bspl4.13 54.03768
## bspl4.14 41.01061
## bspl4.15 91.97008
class(fd_smooth$fd)
## [1] "fd"
methods(class = "fd")
## [1] - [ * ^ +
## [6] boxplot coef coefficients density deriv
## [11] fRegress lines mean norder plot
## [16] predict sqrt sum summary
## see '?methods' for accessing help and source code
mean_function <- mean.fd(fd_smooth$fd)
plot(mean_function)

## [1] "done"
variance_fda <- function(variance_surface,
nda,
npunt,
x_lab = "X",
y_lab = "Y",
z_lab = "Z") {
eva <- seq(1, nda, length = npunt)
super_eval <- eval.bifd(eva, eva, variance_surface);
persp(eva, eva, super_eval,
theta = -45, phi = 25, r = 3, expand = 0.5,
ticktype = "detailed",
xlab = x_lab,
ylab = y_lab,
zlab = z_lab)
}
var_fd <- var.fd(fd_smooth$fd)
variance_fda(var.fd(fd_smooth$fd), npm1o, 200)

FPCA
pcapm10 <- pca.fd(fd_smooth$fd,
nharm = 2,
harmfdPar = fdPar(fd_smooth$fd))
str(pcapm10)
## List of 5
## $ harmonics:List of 3
## ..$ coefs : num [1:15, 1:2] 0.049 0.1122 0.0917 0.0688 0.0415 ...
## .. ..- attr(*, "dimnames")=List of 2
## .. .. ..$ : chr [1:15] "bspl4.1" "bspl4.2" "bspl4.3" "bspl4.4" ...
## .. .. ..$ : chr [1:2] "PC1" "PC2"
## ..$ basis :List of 10
## .. ..$ call : language basisfd(type = type, rangeval = rangeval, nbasis = nbasis, params = params, dropind = dropind, quadvals = qu| __truncated__
## .. ..$ type : chr "bspline"
## .. ..$ rangeval : num [1:2] 1 301
## .. ..$ nbasis : num 15
## .. ..$ params : num [1:11] 26 51 76 101 126 151 176 201 226 251 ...
## .. ..$ dropind : NULL
## .. ..$ quadvals : NULL
## .. ..$ values : list()
## .. ..$ basisvalues: list()
## .. ..$ names : chr [1:15] "bspl4.1" "bspl4.2" "bspl4.3" "bspl4.4" ...
## .. ..- attr(*, "class")= chr "basisfd"
## ..$ fdnames:List of 3
## .. ..$ : chr [1:15] "bspl4.1" "bspl4.2" "bspl4.3" "bspl4.4" ...
## .. ..$ : chr [1:2] "PC1" "PC2"
## .. ..$ : chr "values"
## ..- attr(*, "class")= chr "fd"
## $ values : num [1:15] 25004 1765 1395 1017 539 ...
## $ scores : num [1:15, 1:2] -214 -105 100 107 -116 ...
## $ varprop : num [1:2] 0.813 0.0574
## $ meanfd :List of 3
## ..$ coefs : num [1:15, 1] 50.8 56.9 69.7 59.7 49.1 ...
## .. ..- attr(*, "dimnames")=List of 2
## .. .. ..$ : NULL
## .. .. ..$ : chr "mean"
## ..$ basis :List of 10
## .. ..$ call : language basisfd(type = type, rangeval = rangeval, nbasis = nbasis, params = params, dropind = dropind, quadvals = qu| __truncated__
## .. ..$ type : chr "bspline"
## .. ..$ rangeval : num [1:2] 1 301
## .. ..$ nbasis : num 15
## .. ..$ params : num [1:11] 26 51 76 101 126 151 176 201 226 251 ...
## .. ..$ dropind : NULL
## .. ..$ quadvals : NULL
## .. ..$ values : list()
## .. ..$ basisvalues: list()
## .. ..$ names : chr [1:15] "bspl4.1" "bspl4.2" "bspl4.3" "bspl4.4" ...
## .. ..- attr(*, "class")= chr "basisfd"
## ..$ fdnames:List of 3
## .. ..$ time : int [1:301] 1 2 3 4 5 6 7 8 9 10 ...
## .. ..$ reps : chr "mean"
## .. ..$ values: chr "mean value"
## ..- attr(*, "class")= chr "fd"
## - attr(*, "class")= chr "pca.fd"
class(pcapm10)
## [1] "pca.fd"
methods(class = "pca.fd")
## [1] plot
## see '?methods' for accessing help and source code
plot(pcapm10)


print(pcapm10$scores)
## [,1] [,2]
## [1,] -213.76428 12.064608
## [2,] -104.94160 -21.122787
## [3,] 100.45805 41.592457
## [4,] 107.48669 24.076751
## [5,] -116.31566 8.395449
## [6,] -222.05214 5.067746
## [7,] 107.77446 53.016242
## [8,] -133.86990 -36.497651
## [9,] 185.99390 -3.479708
## [10,] -193.41326 33.565043
## [11,] -48.29056 2.572591
## [12,] 60.84631 43.467741
## [13,] 18.78715 -63.602167
## [14,] 115.87097 -110.137182
## [15,] 335.42988 11.020866
print(pcapm10$params)
## NULL
plot(pcapm10$scores[, 1], pcapm10$scores[, 2])

str(pcapm10$varprop)
## num [1:2] 0.813 0.0574
Functional BOX_PLOT
ll <- boxplot.fd(fd_smooth$fd, method = "MBD")

ll$depth
## AJM ATI CAM CHO HGM IZT MER
## 0.3075908 0.5194719 0.4793022 0.4221594 0.4780764 0.2380009 0.4279114
## MPA SAG SFE TAH TLA TLI VIF
## 0.4585573 0.4244224 0.3338048 0.5537954 0.5008015 0.5489863 0.4310231
## XAL
## 0.2094295
ll$medcurve
## TAH
## 11
cov.fun = function(d,k,c,mu){
k*exp(-c*d^mu)
}
n=50
p=30
t=seq(0,1,len=p)
d=dist(t,upper=TRUE,diag=TRUE)
d.matrix=as.matrix(d) #
#covariance function in time
t.cov = cov.fun(d.matrix,1,1,1)
# Cholesky Decomposition
L=chol(t.cov)
mu=4*t
e=matrix(rnorm(n*p),p,n)
ydata = mu+t(L)%*%e
#functional boxplot
fbplot(ydata, method='MBD', ylim=c(-11,15))

## $depth
## [1] 0.22753741 0.39063946 0.17937415 0.34802721 0.47711565 0.40315646
## [7] 0.40821769 0.07385034 0.44032653 0.40642177 0.47918367 0.34682993
## [13] 0.47308844 0.39961905 0.37921088 0.40723810 0.24440816 0.21921088
## [19] 0.37148299 0.42634014 0.46628571 0.21142857 0.30302041 0.33703401
## [25] 0.42029932 0.29034014 0.37948299 0.42835374 0.46013605 0.37365986
## [31] 0.16125170 0.48146939 0.48832653 0.28065306 0.32195918 0.43635374
## [37] 0.47542857 0.38862585 0.22840816 0.23140136 0.39662585 0.48146939
## [43] 0.47200000 0.38563265 0.21648980 0.24549660 0.36832653 0.44598639
## [49] 0.34911565 0.37365986
##
## $outpoint
## [1] 8
##
## $medcurve
## [1] 33
##
## Model 2 with outliers
##
#magnitude
k=6
#randomly introduce outliers
C=rbinom(n,1,0.1)
s=2*rbinom(n,1,0.5)-1
cs.m=matrix(C*s,p,n,byrow=TRUE)
e=matrix(rnorm(n*p),p,n)
y=mu+t(L)%*%e+k*cs.m
#functional boxplot
fbplot(y,method='MBD',ylim=c(-11,15))

## $depth
## [1] 0.05420408 0.47559184 0.43771429 0.40604082 0.46759184 0.40429932
## [7] 0.47025850 0.41170068 0.45855782 0.40228571 0.36936054 0.47749660
## [13] 0.47210884 0.07776871 0.38062585 0.31597279 0.44718367 0.31902041
## [19] 0.15221769 0.46438095 0.44440816 0.47842177 0.20952381 0.49774150
## [25] 0.07765986 0.48364626 0.12870748 0.47765986 0.45942857 0.42672109
## [31] 0.10405442 0.04391837 0.34688435 0.47531973 0.38993197 0.27096599
## [37] 0.31727891 0.45518367 0.43787755 0.41714286 0.17289796 0.28925170
## [43] 0.43874830 0.29317007 0.20435374 0.50454422 0.39151020 0.44740136
## [49] 0.44919728 0.40206803
##
## $outpoint
## [1] 1 14 19 25 27 31 32 41
##
## $medcurve
## [1] 46